{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "\n",
    "import os\n",
    "import numpy\n",
    "import itertools\n",
    "from sklearn.utils import shuffle\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "# Execute rpy2_setup.sh to install dependencies\n",
    "import rpy2.robjects as robjects\n",
    "from rpy2.robjects import pandas2ri\n",
    "from rpy2.robjects import numpy2ri\n",
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "pandas2ri.activate()\n",
    "numpy2ri.activate()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load RData as Pandas DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fault_free_training_rf = robjects.r['load'](\"../data/TEP_FaultFree_Training.RData\")\n",
    "fault_free_testing_rf = robjects.r['load'](\"../data/TEP_FaultFree_Testing.RData\")\n",
    "fault_free_training_df = shuffle(pandas2ri.ri2py_dataframe(robjects.globalenv[\"fault_free_training\"]))\n",
    "fault_free_testing_df = shuffle(pandas2ri.ri2py_dataframe(robjects.globalenv[\"fault_free_testing\"]))\n",
    "\n",
    "# Cleanup RData variables\n",
    "del fault_free_training_rf\n",
    "del fault_free_testing_rf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "faulty_training_rf = robjects.r['load'](\"../data/TEP_Faulty_Training.RData\")\n",
    "faulty_testing_rf = robjects.r['load'](\"../data/TEP_Faulty_Testing.RData\")\n",
    "faulty_training_df = shuffle(pandas2ri.ri2py_dataframe(robjects.globalenv[\"faulty_training\"]))\n",
    "faulty_testing_df = shuffle(pandas2ri.ri2py_dataframe(robjects.globalenv[\"faulty_testing\"]))\n",
    "\n",
    "# Cleanup RData variables  \n",
    "del faulty_training_rf\n",
    "del faulty_testing_rf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---------------------------------------------------------------------------------------------------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Get Data Stream Iterator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_input_fn(data_set, num_epochs=None, shuffle=True):\n",
    "  return tf.estimator.inputs.pandas_input_fn(\n",
    "      x=data_set.drop(['faultNumber', 'simulationRun', 'sample'], axis=1),\n",
    "      y = data_set['faultNumber'],\n",
    "      num_epochs=num_epochs,\n",
    "      shuffle=shuffle)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_feature_cols(data_set):\n",
    "    return [tf.feature_column.numeric_column(k) \n",
    "            for k in data_set.drop(['faultNumber', 'simulationRun', 'sample'], axis=1).keys()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Simple Neural Network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train_df = shuffle(pd.concat([fault_free_training_df, faulty_training_df]))\n",
    "# test_df = shuffle(pd.concat([fault_free_testing_df, faulty_testing_df]))\n",
    "train_df = faulty_training_df.copy(deep=False)\n",
    "test_df = faulty_testing_df.copy(deep=False)\n",
    "\n",
    "# Cleanup DataFrames\n",
    "# del fault_free_testing_df\n",
    "# del fault_free_training_df\n",
    "# del faulty_testing_df\n",
    "# del faulty_training_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regressor = tf.estimator.DNNRegressor(feature_columns=get_feature_cols(train_df), hidden_units=[10, 10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regressor.train(get_input_fn(train_df, 10), steps=500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = regressor.predict(input_fn=get_input_fn(test_df[:10], num_epochs=1, shuffle=False))\n",
    "predictions = list(p[\"predictions\"] for p in itertools.islice(y, 6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print \"Prediction: \" + str([int(i[0]) for i in predictions])\n",
    "print \"Actual: \" + str(test_df['faultNumber'][:10].values)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
